Data Visualization#
import pandas as pd
import math
import numpy as np
import numpy as np
import pandas as pd
import shapefile as shp
import matplotlib.pyplot as plt
import seaborn as sns
from cancerolstools import preprocess as pp
from cancerolstools import visuals as vi
fips_df = pp.import_fips()
cancer_df = pp.import_cancer()
cancer_df = pp.cancer_preprocess(cancer_df)
cancer_df = pp.merge_data(cancer_df, fips_df)
graphdata = pp.graph_dataframe(cancer_df)
graphdata
| fips | anomalies | |
|---|---|---|
| 0 | 53035 | 0.000000 |
| 1 | 53037 | 0.000000 |
| 2 | 53039 | 0.000000 |
| 3 | 16061 | 0.000000 |
| 4 | 21135 | 0.000000 |
| ... | ... | ... |
| 14223 | 48159 | 0.000000 |
| 14224 | 50011 | 0.000000 |
| 14225 | 51067 | 0.000000 |
| 14226 | 53021 | 0.000000 |
| 14227 | 20061 | 1.140201 |
14228 rows × 2 columns
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
import plotly.express as px
fig = vi.anomaly_map(graphdata)
fig.show()
# fig = px.choropleth_mapbox(newbieLOG, geojson=counties, locations='fips', color='anomalies',
# color_continuous_scale="Hot_r",
# range_color=(0, 4),
# mapbox_style="carto-positron",
# zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
# opacity=0.5,
# labels={'unemp':'unemployment rate'}
# )
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
# fig.show()
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
# df_race = cancer_df[['TARGET_deathRate', 'medIncome', 'PctWhite', 'PctBlack', 'PctAsian', 'PctOtherRace']]
# df_race = df_race.melt(id_vars=['TARGET_deathRate', 'medIncome'], var_name='variable')
# def get_variable_group(variable):
# if variable == 'pctwhite':
# return 'White'
# elif variable == 'pctblack':
# return 'Black'
# elif variable == 'pctasian':
# return 'Asian'
# else:
# return 'Other'
# df_race['variable_group'] = df_race['variable'].apply(get_variable_group)
race_plot_df = vi.df_race(cancer_df)
# colors = ['red', 'blue', 'green', 'purple']
# color_map = dict(zip(df_race['variable_group'].unique(), colors))
# fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 12))
# for ax, variable in zip(axes.flatten(), df_race['variable'].unique()):
# data = df_race[df_race['variable'] == variable]
# ax.scatter(data['value'], data['TARGET_deathRate'], c=data['variable_group'].apply(lambda x: color_map[x]), s=data['medIncome']/5000, alpha=0.7)
# ax.set_xlabel('Percentage of population by race')
# ax.set_ylabel('Target death rate')
# ax.set_title(variable)
# ax.set_ylim([100, 400])
fig = vi.race_income_plot(race_plot_df)
plt.suptitle('Impact of race and income on target death rate', fontsize=16)
plt.tight_layout()
plt.subplots_adjust(top=0.92)
plt.show()